# replication code for analyzing
# with pivot scale dimensions as outcomes

library(here)

here::here()

source("code/startup.R")
library(parrot)
library(modelsummary)

# function to calculate confidence interval of coefficient estimate
get_confint <- function(est, se, n, npar, alpha = .05){
  
  # degrees of freedom is number of observations - number of params
  df <- n - npar
  
  # set t critical value from alpha
  t.val <- qt((1-(alpha/2)), df)
  
  # bounds are est +/- critical value number of SEs
  lwr <- est - t.val*se
  upr <- est + t.val*se
  
  sig <- 0
  if(lwr > 0 | upr < 0){
    sig <- 1
  }
  
  return(data.frame(lower = lwr,
                    coef = est,
                    upper = upr,
                    statsig = sig))
}

get_keywords_custom <- function (scores, n_dimensions, n_words = 15, stretch = 3, capture_output = FALSE, 
                                 pivots_only = TRUE, topic) 
{
  all_keywords <- list()
  if (stretch%%2 != 1) 
    stop("Please enter odd integer for \"stretch\"")
  for (i in if (length(n_dimensions) == 1) {
    1:n_dimensions
  }
  else {
    n_dimensions
  }) {
    general_keywords <- scores$vocab[order(scores$pivot_scores[, 
                                                               i + 1] * sqrt(rowSums(scores$pivot_scores[, -1]^2)), 
                                           decreasing = TRUE)]
    specific_keywords <- scores$vocab[order(scores$word_scores[, 
                                                               i + 1]^(stretch) * sqrt(rowSums(scores$pivot_scores[, 
                                                                                                                   -1]^2)), decreasing = TRUE)]
    if (pivots_only) {
      keywords <- data.frame(head(rev(general_keywords), 
                                  n = n_words), head(general_keywords, n = n_words))
      names(keywords) <- c("pivots (-)", "(+) pivots")
    }
    else {
      keywords <- data.frame(head(rev(specific_keywords), 
                                  n = n_words), head(rev(general_keywords), n = n_words), 
                             head(general_keywords, n = n_words), head(specific_keywords, 
                                                                       n = n_words))
      names(keywords) <- c("scores (-)", "pivots (-)", 
                           "(+) pivots", "(+) scores")
    }
    if (capture_output) {
      all_keywords[[paste0("D", i)]] <- keywords
    }
    else {
      if (!requireNamespace("knitr", quietly = TRUE)) {
        cat("\nDimension", i, "keywords\n\n")
        print(keywords, row.names = F)
        cat("\n")
      }
      else {
        print(knitr::kable(keywords, align = "c", format = "pandoc", 
                           caption = paste("Dimension", i, "keywords: ", topic)))
        cat("\n")
      }
    }
  }
  if (capture_output) {
    return(all_keywords)
  }
}

# function to plot pivot scaling results
plot_keywords_custom <- function (scores, 
                                  scoredat = NULL,
                                  x_dimension = 1, y_dimension = 2, 
                                  q_cutoff = 0.9, 
                                  plot_density = FALSE, 
                                  plot_density2d = FALSE,
                                  plot_alpha = .7,
                                  density_by = "running",
                                  unstretch = FALSE, color = FALSE,
                                  subjname = "subject"){
  if(plot_density == TRUE & plot_density2d == TRUE){
    stop("plot_density and plot_density2d cannot both be true")
  }
  if(plot_density2d == TRUE & is.null(scoredat)){
    stop("must supply data for plotting 2d density")
  }
  
  if (unstretch) {
    scores$word_scores <- sweep(scores$word_scores, 1, 
                                sqrt(rowSums((scores$importance[-1] * scores$pivot_scores[, -1])^2)) + 1, `/`)
  }
  word_scores <- data.frame(scores$word_scores)
  word_counts <- scores$word_counts
  above_cutoff <- word_counts > quantile(word_counts, q_cutoff)
  x_dimension <- x_dimension + 1
  y_dimension <- y_dimension + 1
  if (color & !("color" %in% names(scores))) {
    scores$color <- factor(kmeans(scores$word_scores[, 2:11], 
                                  centers = 5)$cluster)
  }
  if (!color & !plot_density2d) {
    g <- ggplot2::ggplot() + 
      ggplot2::geom_text(data = word_scores[above_cutoff,], 
                         ggplot2::aes(x = word_scores[above_cutoff, x_dimension], 
                                      y = word_scores[above_cutoff, y_dimension], 
                                      label = scores$vocab[above_cutoff])) + 
      ggplot2::xlab(paste("Dimension:", x_dimension - 1)) + 
      ggplot2::ylab(paste("Dimension:", y_dimension - 1)) + 
      ggplot2::guides(size = "none") + ggplot2::theme_classic() + 
      ggplot2::xlim(-max(abs(word_scores[above_cutoff,  x_dimension])), 
                    max(abs(word_scores[above_cutoff, x_dimension]))) + 
      ggplot2::ylim(-max(abs(word_scores[above_cutoff, y_dimension])), 
                    max(abs(word_scores[above_cutoff, y_dimension])))+
      ggplot2::ggtitle(paste0("Embedded dimensions in Run for Something respondents' statements of interest"),
                       subtitle = paste0("Top ", 100*(1-q_cutoff), "% of words shown"))
  }
  if(!color & plot_density2d){
    
    xname <- paste0("X", x_dimension - 1)
    yname <- paste0("X", y_dimension - 1)
    
    g <- ggplot2::ggplot() + 
      ggplot2::geom_density_2d(data = scoredat,
                               ggplot2::aes(x = scoredat %>% pull(xname),
                                            y = scoredat %>% pull(yname),
                                            lty = scoredat %>% pull(density_by) %>% factor),
                               alpha = plot_alpha)+
      ggplot2::geom_text(data = word_scores[above_cutoff,], 
                         ggplot2::aes(x = word_scores[above_cutoff, x_dimension], 
                                      y = word_scores[above_cutoff, y_dimension], 
                                      label = scores$vocab[above_cutoff]),
                         alpha = plot_alpha) + 
      ggplot2::xlab(paste("Dimension:", x_dimension - 1)) + 
      ggplot2::ylab(paste("Dimension:", y_dimension - 1)) + 
      ggplot2::guides(size = "none") + ggplot2::theme_classic() + 
      ggplot2::xlim(-max(abs(word_scores[above_cutoff,  x_dimension])), 
                    max(abs(word_scores[above_cutoff, x_dimension]))) + 
      ggplot2::ylim(-max(abs(word_scores[above_cutoff, y_dimension])), 
                    max(abs(word_scores[above_cutoff, y_dimension])))+
      ggplot2::ggtitle(paste0("Embedded dimensions in Run for Something respondents' statements of interest"),
                       subtitle = paste0("Top ", 100*(1-q_cutoff), "% of words plotted by word scores in two dimensions\nOverlaid on document score densities for candidates and non-candidates in same dimensions"))
  }
  else {
    g <- ggplot2::ggplot() + 
      ggplot2::geom_text(data = word_scores[above_cutoff, ], 
                         ggplot2::aes(x = word_scores[above_cutoff, x_dimension], 
                                      y = word_scores[above_cutoff, y_dimension], 
                                      label = scores$vocab[above_cutoff], 
                                      color = scores$color[above_cutoff])) + 
      ggplot2::xlab(paste("Dimension:", x_dimension - 1)) + 
      ggplot2::ylab(paste("Dimension:", y_dimension - 1)) + 
      ggplot2::guides(size = "none", color = "none") + ggplot2::theme_classic() + 
      ggplot2::xlim(-max(abs(word_scores[above_cutoff, 
                                         x_dimension])), 
                    max(abs(word_scores[above_cutoff, x_dimension]))) + 
      ggplot2::ylim(-max(abs(word_scores[above_cutoff, y_dimension])), 
                    max(abs(word_scores[above_cutoff,  y_dimension])))+
      ggplot2::ggtitle(paste0("Embedded dimensions in Run for Something respondents' statements of interest"),
                       subtitle = paste0("Top ", 100*(1-q_cutoff), "% of words shown"))
  }
  if (!plot_density) {
    return(g)
  }else{
    gridExtra::grid.arrange(g, ggplot2::ggplot() + 
                              ggplot2::geom_density(ggplot2::aes(x = word_scores[, x_dimension])) +
                              ggplot2::xlab(paste("Dimension:", x_dimension - 1)) + 
                              ggplot2::theme_classic(), 
                            ggplot2::ggplot() + 
                              ggplot2::geom_density(ggplot2::aes(x = word_scores[,  y_dimension])) + 
                              ggplot2::xlab(paste("Dimension",  y_dimension - 1)) + 
                              ggplot2::theme_classic(), 
                            layout_matrix = rbind(c(1, 1, 2), c(1, 1, 3)))
  }
}

plot_dist <- function(data = rfs_parrot_scores, 
                      dim = 1, 
                      dimname = "General Interest (-) vs. Specific Issues (+)",
                      byvar = NULL,
                      byname = NULL){
  data$out <- data %>% pull(paste0("X", dim))
  data <- data %>% filter(!is.na(out))
  if(!is.null(byvar)){
    data$byvar <- data %>% pull(byvar)
    
    plot <-  
      data %>%
      ggplot(aes(x = out, lty = factor(byvar)))+
      geom_density()+
      labs(x = "Document Score", y = "Density",
           title = paste0("Distribution of Document Scores by ", byname),
           subtitle = paste0("For Dimension ", dim, ": ", dimname))
  }else{
    plot <-  
      data %>%
      ggplot(aes(x = out))+
      geom_density()+
      labs(x = 'Document Score', y = "Density",
           title = paste0("Distribution of Document Scores"),
           subtitle = paste0("For Dimension ", dim, ": ", dimname))
  }
  return(plot)
}


rfs_parrot_scores <- readr::read_csv("data/dat_scored.csv")
rfs_parrot_scores_nocutoff <- readr::read_csv("data/dat_scored_nocutoffs.csv")

rfs_parrot_scores %>%
  filter(!is.na(X1)) %>%
  group_by(USR) %>%
  summarise(MX1 = mean(X1),
            MX2 = mean(X2),
            MX3 = mean(X3))
vars_map <- c( "cutoff_doc" = "Cut-off Document",
               "std.age" = "Age (SD)",
               "college.catmnc" = "Education: Non-College",
               "college.catmuo" = "Education: Not Observed",
               "college.catnonmatched" = "Not Matched to Voter File",
               "inc.catbtw50and100" = "Income: Between $50k and $100k",
               "inc.catmuo" = "Income: Not Observed",
               "inc.catover250" = "Income: Over $250k",
               "inc.catunder50" = "Income: Under $50k",
               "USRS" = "Suburban (vs. Rural)",
               "USRU" = "Urban (vs. Rural)",
               "USRunk" = "Unknown Geogrpahy (vs. Rural)",
               "race.comb" = "p(White)",
               "gen.comb.imp" = "p(Male)")
vars_map_run <- c( "cutoff_doc" = "Cut-off Document",
               "std.age" = "Age (SD)",
               "college.catmnc" = "Education: Non-College",
               "college.catmuo" = "Education: Not Observed",
               "college.catnonmatched" = "Not Matched to Voter File",
               "inc.catbtw50and100" = "Income: Between $50k and $100k",
               "inc.catmuo" = "Income: Matched Not Observed",
               "inc.catover250" = "Income: Over $250k",
               "inc.catunder50" = "Income: Under $50k",
               "USRS" = "Suburban (vs. Rural)",
               "USRU" = "Urban (vs. Rural)",
               "USRunk" = "Unknown Geogrpahy (vs. Rural)",
               "race.comb" = "p(White)",
               "gen.comb.imp" = "p(Male)",
               "X1" = "First Dimension (SD)\n(General Interest - Specific Issues)",
               "X2" = "Second Dimension (SD)\n(Core Values - Political Opportunities)",
               "X3" = "Third Dimension (SD)\n(Progressive Populism - Personal Background)")

mod1 <- fixest::feols(X1 ~ cutoff_doc + std.age + college.cat + inc.cat + USR + 
             race.comb + gen.comb.imp + USR,
           data = rfs_parrot_scores)
s1 <- summary(mod1)

mod2 <- fixest::feols(X2 ~ cutoff_doc + std.age + college.cat + inc.cat + USR + 
             race.comb + gen.comb.imp + USR,
           data = rfs_parrot_scores)
s2 <- summary(mod2)

mod3 <- fixest::feols(X3 ~ cutoff_doc + std.age + college.cat + inc.cat + USR + 
             race.comb + gen.comb.imp + USR,
           data = rfs_parrot_scores)
s3 <- summary(mod3)

lm1 <- lm(X1 ~ cutoff_doc + std.age + college.cat + inc.cat + USR + 
            race.comb + gen.comb.imp + USR,
          data = rfs_parrot_scores)
lm2 <- lm(X2 ~ cutoff_doc + std.age + college.cat + inc.cat + USR + 
            race.comb + gen.comb.imp + USR,
          data = rfs_parrot_scores)
lm3 <- lm(X3 ~ cutoff_doc + std.age + college.cat + inc.cat + USR + 
            race.comb + gen.comb.imp + USR,
          data = rfs_parrot_scores)

lm1.nc <- lm(X1 ~ running + std.age + college.cat + inc.cat + USR + 
            race.comb + gen.comb.imp + USR,
          data = rfs_parrot_scores %>% filter(cutoff_doc == 0))
lm2.nc <- lm(X2 ~ running + std.age + college.cat + inc.cat + USR + 
            race.comb + gen.comb.imp + USR,
          data = rfs_parrot_scores %>% filter(cutoff_doc == 0))
lm3.nc <- lm(X3 ~ running + std.age + college.cat + inc.cat + USR + 
            race.comb + gen.comb.imp + USR,
          data = rfs_parrot_scores %>% filter(cutoff_doc == 0))

mod_run_basic <- fixest::feglm(running ~ cutoff_doc + X1 + X2 + X3,
                      data = rfs_parrot_scores %>%
                        mutate_at(.vars = dplyr::vars(starts_with("X")),
                                  .funs = scale), 
                      family = "binomial")
mod_run_full <- fixest::feglm(running ~ cutoff_doc + std.age + college.cat + inc.cat + USR + 
                           race.comb + gen.comb.imp + USR +
                           X1 + X2 + X3,
                         data = rfs_parrot_scores %>%
                           mutate_at(.vars = dplyr::vars(starts_with("X")),
                                     .funs = scale), 
                         family = "binomial")
mod_run_basic.nc <- fixest::feglm(running ~ cutoff_doc + X1 + X2 + X3,
                               data = rfs_parrot_scores %>% 
                                 filter(cutoff_doc == 0) %>%
                                 mutate_at(.vars = dplyr::vars(starts_with("X")),
                                           .funs = scale), 
                               family = "binomial")
mod_run_full.nc <- fixest::feglm(running ~ cutoff_doc + std.age + college.cat + inc.cat + USR + 
                                race.comb + gen.comb.imp + USR +
                                X1 + X2 + X3,
                              data = rfs_parrot_scores %>% 
                                filter(cutoff_doc == 0) %>%
                                mutate_at(.vars = dplyr::vars(starts_with("X")),
                                          .funs = scale), 
                              family = "binomial")



s_run_basic <- summary(mod_run_basic)
s_run_full <- summary(mod_run_full)

modelsummary::modelsummary(
  list(
    First = lm1,
    Second = lm2,
    Third = lm3
  ),
  stars = c('*' = .1, '**' = .05, '***' = .01),
  coef_map = vars_map,
  output = "output/correlates_of_dims_table.tex")

modelsummary::modelsummary(
  list(
    First = lm1.nc,
    Second = lm2.nc,
    Third = lm3.nc
  ),
  stars = c('*' = .1, '**' = .05, '***' = .01),
  coef_map = vars_map,
  output = "output/correlates_of_dims_table_nocutoff.tex")

modelsummary::modelsummary(
  list(
    `Basic` = mod_run_basic,
    `Full` = mod_run_full,
    `Basic (No Cut-off Documents)` = mod_run_basic.nc,
    `Full (No Cut-off Documents)` = mod_run_full.nc
  ),
  stars = c('*' = .1, '**' = .05, '***' = .01),
  coef_map = vars_map_run,
  output = "output/mod_run.tex")


save(lm1, lm2, lm3, lm1.nc, lm2.nc, lm3.nc, mod_run_basic, mod_run_full, mod_run_basic.nc, mod_run_full.nc,
     file = "output/correlates_of_dims_mods.RData")

coefs_1 <- data.frame(var = names(mod1$coefficients))
#coefs_1 <- coefs_1[-which(coefs_1$var == "inc.catnonmatched"),]
coefs_run_basic <- data.frame(var = names(mod_run_basic$coefficients))
coefs_run_full <- data.frame(var = names(mod_run_full$coefficients))

getcf1 <- map_df(coefs_1$var, function(x){
  get_confint(est = s1$coefficients[names(s1$coefficients) == x],
              se = s1$se[names(s1$coefficients) == x],
              n = s1$nobs,
              npar = s1$nparams) %>%
    mutate(var = x,
           outcome = "X1")
})
getcf2 <- map_df(coefs_1$var, function(x){
  get_confint(est = s2$coefficients[names(s2$coefficients) == x],
              se = s2$se[names(s2$coefficients) == x],
              n = s2$nobs,
              npar = s2$nparams) %>%
    mutate(var = x,
           outcome = "X2")
})
getcf3 <- map_df(coefs_1$var, function(x){
  get_confint(est = s3$coefficients[names(s3$coefficients) == x],
              se = s3$se[names(s3$coefficients) == x],
              n = s3$nobs,
              npar = s3$nparams) %>%
    mutate(var = x,
           outcome = "X3")
})

getcf_run_basic <- map_df(coefs_run_basic$var, function(x){
  get_confint(est = s_run_basic$coefficients[names(s_run_basic$coefficients) == x],
              se = s_run_basic$se[names(s_run_basic$coefficients) == x],
              n = s_run_basic$nobs,
              npar = s_run_basic$nparams) %>%
    mutate(var = x,
           outcome = "Running")
})

getcf_run_full <- map_df(coefs_run_full$var, function(x){
  get_confint(est = s_run_full$coefficients[names(s_run_full$coefficients) == x],
              se = s_run_full$se[names(s_run_full$coefficients) == x],
              n = s_run_full$nobs,
              npar = s_run_full$nparams) %>%
    mutate(var = x,
           outcome = "Running")
})


nametab <- data.frame(modvar = c("(Intercept)",
                                 "cutoff_doc",
                                 "std.age",
                                 "college.catmnc",
                                 "college.catmuo",
                                 "college.catnonmatched",
                                 "inc.catbtw50and100",
                                 "inc.catmuo",
                                 "inc.catover250",
                                 "inc.catunder50",
                                 "USRS",
                                 "USRU",
                                 "USRunk",
                                 "race.comb",
                                 "gen.comb.imp"),
                      label = c("(Intercept)",
                                "Cut-off Document",
                                "Age (SD)",
                                "Non-College",
                                "College Unobserved",
                                "Not Matched to Voter File",
                                "Income: $50-100k",
                                "Income: Unobserved",
                                "Income: Over $250k+",
                                "Income: Under $50k",
                                "Geography: Suburban",
                                "Geography: Urban",
                                "Geography: Unknown",
                                "p(White)",
                                "p(Male)"))

nametab_run <- data.frame(modvar = c("(Intercept)",
                                 "cutoff_doc",
                                 "std.age",
                                 "college.catmnc",
                                 "college.catmuo",
                                 "college.catnonmatched",
                                 "inc.catbtw50and100",
                                 "inc.catmuo",
                                 "inc.catover250",
                                 "inc.catunder50",
                                 "USRS",
                                 "USRU",
                                 "USRunk",
                                 "race.comb",
                                 "gen.comb.imp",
                                 "X1","X2","X3"),
                      label = c("(Intercept)",
                                "Cut-off Document",
                                "Age (SD)",
                                "Non-College",
                                "College Unobserved",
                                "Not Matched to Voter File",
                                "Income: $50-100k",
                                "Income: Unobserved",
                                "Income: Over $250k+",
                                "Income: Under $50k",
                                "Geography: Suburban",
                                "Geography: Urban",
                                "Geography: Unknown",
                                "p(White)",
                                "p(Male)",
                                "First Dimension (SD)\n(General Interest - Specific Issues)",
                                "Second Dimension (SD)\n(Core Values - Political Opportunities)",
                                "Third Dimension (SD)\n(Progressive Populism - Personal Background)"))


library(ggforestplot)

coeftab <- bind_rows(getcf1, getcf2, getcf3) %>%
  left_join(nametab, by = c("var" = "modvar")) %>%
  filter(!var %in% c("(Intercept)","cutoff_doc","USRunk","college.catmuo","inc.catmuo")) %>%
  arrange(desc(coef))
xlim <- as.numeric(unique(fct_rev(fct_inorder(coeftab$label))))

coeftab_run <- 
  bind_rows(getcf_run_full %>%
              mutate(type = "Full"),
            getcf_run_basic %>%
              mutate(type = "Basic")) %>%
  left_join(nametab_run, by = c("var" = "modvar")) %>%
  filter(!var %in% c("(Intercept)","cutoff_doc","USRunk","college.catmuo","inc.catmuo")) %>%
  arrange(desc(coef))
xlim_run <- as.numeric(unique(fct_rev(fct_inorder(coeftab_run$label))))


coefplot <- 
  coeftab %>%
  ggplot(aes(x = fct_rev(fct_inorder(label)),
             y = coef, ymin = lower, ymax = upper,
             #pch = outcome,
             alpha = factor(statsig)))+
  facet_wrap(~outcome, nrow = 1, ncol = 3,
             labeller = as_labeller(c("X1" = "First Dimension\n(General Interest - Specific Issues)",
                                      "X2" = "Second Dimension\n(Core Values - Political Opportunities)",
                                      "X3" = "Third Dimension\n(Progressive Populism - Personal Background)")))+
  geom_pointrange(position = position_dodge(width =.5))+
  scale_alpha_manual(name = "",
                     breaks = c(0,1),
                     values = c(.3, 1),
                     labels = c("ns","s"))+
  geom_hline(yintercept = 0, lty = "dashed")+
  annotate("rect", xmin = xlim[1]-.5, xmax= xlim[2]-.5, 
           ymin=-Inf, ymax=Inf, alpha=0.3, fill="grey") +
  annotate("rect", xmin = xlim[3]-.5, xmax= xlim[4]-.5, 
           ymin=-Inf, ymax=Inf, alpha=0.3, fill="grey") +
  annotate("rect", xmin = xlim[5]-.5, xmax= xlim[6]-.5, 
           ymin=-Inf, ymax=Inf, alpha=0.3, fill="grey") +
  annotate("rect", xmin = xlim[7]-.5, xmax= xlim[8]-.5, 
           ymin=-Inf, ymax=Inf, alpha=0.3, fill="grey") +
  annotate("rect", xmin = xlim[9]-.5, xmax= xlim[10]-.5, 
           ymin=-Inf, ymax=Inf, alpha=0.3, fill="grey") +
  coord_flip()+
  guides(alpha = "none")+
  labs(x = "Variable", y = "OLS Coefficient",
       title = "Correlates of Stated Motives",
       subtitle = "Estimates from OLS regressions with pivot scale dimensions as outcomes",
       caption = "Coefficients significant at p < .05 in black\nEducation and income reflect modeled estimates among respondents matched to voter file\nIntercept, cut-off document, unknown geography, unobserved education (matched to voter file), and unobserved income (matched to voter file) omitted for clarity")+
  theme_jg()+
  theme(plot.caption = element_text(hjust = 0))
ggsave(coefplot, file = "figures/correlates_of_dims.png", width = 14, height = 6)


coefplot_run <- 
  coeftab_run %>%
  ggplot(aes(x = fct_rev(fct_inorder(label)),
             y = coef, ymin = lower, ymax = upper,
             pch = type,
             alpha = factor(statsig)))+
  geom_pointrange(position = position_dodge(width =.5))+
  scale_alpha_manual(name = "",
                     breaks = c(0,1),
                     values = c(.3, 1),
                     labels = c("ns","s"))+
  geom_hline(yintercept = 0, lty = "dashed")+
  annotate("rect", xmin = xlim_run[1]-.5, xmax= xlim_run[2]-.5, 
           ymin=-Inf, ymax=Inf, alpha=0.3, fill="grey") +
  annotate("rect", xmin = xlim_run[3]-.5, xmax= xlim_run[4]-.5, 
           ymin=-Inf, ymax=Inf, alpha=0.3, fill="grey") +
  annotate("rect", xmin = xlim_run[5]-.5, xmax= xlim_run[6]-.5, 
           ymin=-Inf, ymax=Inf, alpha=0.3, fill="grey") +
  annotate("rect", xmin = xlim_run[7]-.5, xmax= xlim_run[8]-.5, 
           ymin=-Inf, ymax=Inf, alpha=0.3, fill="grey") +
  annotate("rect", xmin = xlim_run[9]-.5, xmax= xlim_run[10]-.5, 
           ymin=-Inf, ymax=Inf, alpha=0.3, fill="grey") +
  annotate("rect", xmin = xlim_run[11]-.5, xmax= xlim_run[12]-.5, 
           ymin=-Inf, ymax=Inf, alpha=0.3, fill="grey")+
  coord_flip()+
  scale_shape_discrete(name = "Specification",
                       breaks = c("Basic","Full"),
                       labels = c("Text Dimensions Only",
                                  "Full"))+
  guides(alpha = "none")+
  labs(x = "Variable", y = "Logit Coefficient",
       title = "Correlates of Candidate Emergence",
       subtitle = "Estimates from logistic regression with candidate emergence as outcome",
       caption = "Coefficients significant at p < .05 in black\nEducation and income reflect modeled estimates among respondents matched to voter file\nIntercept, cut-off document, unknown geography, unobserved education (matched to voter file), and unobserved income (matched to voter file) omitted for clarity")+
  theme_jg()+
  theme(plot.caption = element_text(hjust = 0))
ggsave(coefplot_run, file = "figures/correlates_of_run.png", width = 16, height = 7)



